In this sub-vignette we present the analysis and source code for Figure 2. This sub-vignette can be built along with all others, to generate all figures, by running CLLCytokineScreen2021.Rmd.
Load libraries
library(RColorBrewer)
library(ggrepel)
library(patchwork)
library(ggplot2)
library(tidyr)
library(ggbeeswarm)
library(magrittr)
library(pheatmap)
library(ggfortify)
library(gridExtra)
library(DESeq2)
library(survival)
library(survminer)
library(glmnet)
library(ConsensusClusterPlus)
library(clusterProfiler)
library(org.Hs.eg.db)
library(msigdbr)
library(dplyr)
library(tidyverse)
Set plotting directory
plotDir = ifelse(exists(".standalone"), "", "../../inst/figs/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
## Warning in dir.create(plotDir): '..\..\inst\figs' existiert bereits
Raw
#df: tibble containing all screening viability data
load( "../../data/df.RData")
#patMeta: tibble containing all patient genetic data
load( "../../data/patMeta.RData")
#LDT: tibble containing Lymhpocyte DOubling Times for patients in screen
load( "../../data/LDT.RData")
#survT: tibble containing disease progression data for patients in screen
load( "../../data/survT.RData")
#dds_smp: DESeqDataSet containing RNA counts and assosciated meta data for matched samples to those in screen
load( "../../data/dds_smp.RData")
#From tsvs
#screening data
df <- read.table(file= "../../inst/extdata/df.txt",header = TRUE) %>% as_tibble()
#Patient meta data
patMeta <- read.table(file= "../../inst/extdata/patMeta.txt",header = TRUE) %>% as_tibble()
#Lymphocyte Doubling Times
LDT <- read.table(file= "../../inst/extdata/LDT.txt",header = TRUE) %>% as_tibble()
#Survival times
survT <- read.table(file= "../../inst/extdata/survT.txt",header = TRUE) %>% as_tibble()
#RNA data
rna_countsMatrix <- read.table(file= "../../inst/extdata/rna_countsMatrix.txt", header = TRUE)
coldata_rna <- read.table(file="../../inst/extdata/coldata_rna.txt", header= TRUE)
rowdata_rna <- read.table(file= "../../inst/extdata/rowdata_rna.txt", header= TRUE)
#Arrange data for analysis
#screening data
df$Cytokine <- factor(df$Cytokine, levels= c("No Cytokine",
"Resiquimod",
"IL-4",
"TGF-b1",
"IL-1b",
"Interferon gamma",
"SDF-1a",
"sCD40L" ,
"sCD40L+IL-4",
"soluble anti-IgM",
"CpG ODN",
"IL-6",
"IL-10",
"IL-21",
"HS-5 CM",
"IL-15",
"BAFF",
"IL-2" ))
#patMeta
patMeta[sapply(patMeta, is.character)] <- lapply(patMeta[sapply(patMeta, is.character)], as.factor)
patMeta[sapply(patMeta, is.integer)] <- lapply(patMeta[sapply(patMeta, is.integer)], as.factor)
patMeta$PatientID <- as.character(patMeta$PatientID)
#RNA
#assemble deseq object
rownames(coldata_rna) <- coldata_rna$PatientID
dds_smp <- DESeqDataSetFromMatrix(rna_countsMatrix, coldata_rna, design =~1)
rowData(dds_smp) <- rowdata_rna
Process data
# Subset data to Cytokine only treatments, no drugs
df_full<-df
df <- dplyr::filter(df, Drug == "DMSO", Cytokine != "No Cytokine")
source("../../R/themes_colors.R")
deckel function: to set limits of scaling factor. Accepts a number x, and two numeric limits, lower and upper.
deckel <- function(x, lower = -Inf, upper = +Inf) ifelse(x<lower, lower, ifelse(x>upper, upper, x))
medianCenter_MadScale function: to scale viability values according to MAD and then center values at zero. Maximum/minimum size of scaling factor set with deckel (above). Accepts a vector x to scale.
medianCenter_MadScale <- function(x) {
s=0
(x - s) / deckel(mad(x, center = s), lower = 0.05, upper = 0.2)
}
scaleCytResp function: to apply medianCenter_MadScale row wise to viability matrix. Accepts x, a matrix of log(viability) values.
scaleCytResp <- function(x) t(apply(x, 1, medianCenter_MadScale))
Hierarchical clustering function: To provide cluster function for running ConsensusClusterPlus. Accepts this_dist, a dissimilarity structure as produced by dist and k, the number of clusters to assign.
myclusterfunction = function(this_dist, k){
#run hierarchical cluster analysis on dissimilarity structure this_dist
hc = hclust(this_dist)
#cut cut tree into k groups
assignment = cutree(hc, k)
return(assignment)
}
Euclidean Distance function: To provide distance function for running ConsensusClusterPlus. Calculates Euclidean distances for matrix x.
myDistFunc = function(x){ dist(x, method="euclidean")}
Factor2Ind: To generate indicator matrix from a factor. Given a factor x, create an indicator matrix of dimension length(x) multiplied by nlevels(x)-1, dropping the column corresponding to the baseline level (by default the first level is used as baseline).
factor2ind <- function(x, baseline)
{
xname <- deparse(substitute(x))
n <- length(x)
x <- as.factor(x)
if(!missing(baseline)) x <- relevel(x, baseline)
X <- matrix(0, n, length(levels(x)))
X[(1:n) + n*(unclass(x)-1)] <- 1
X[is.na(x),] <- NA
dimnames(X) <- list(names(x), paste(xname, levels(x), sep = ":"))
return(X[,-1,drop=FALSE])
}
Function for multinomial regression: To perform multinomial regression to identify genetic features that are predictors of cluster assignment. Provide a feature matrix X and a response matrix y, and specify method (regression method), repeats (number of repeats of the regression) and folds (number of folds to split the data into for cross validation).
runGlm.multi <-
function(X, y, method = "ridge", repeats=20, folds = 3) {
modelList <- list()
lambdaList <- c()
coefMat <-
lapply(unique(y), function(n) {
mat <- matrix(NA, ncol(X), repeats)
rownames(mat) <- colnames(X)
mat
})
names(coefMat) <- unique(y)
alpha = switch(method, lasso = 1, ridge = 0, stop("Please provide a valid method: lasso or ridge"))
for (i in seq(repeats)) {
#balanced sampling
vecFold <- mltools::folds(y, nfolds = folds, stratified = TRUE, seed = i*1996)
res <- cv.glmnet(X, y, type.measure = "class",
foldid = vecFold, alpha = alpha, standardize = FALSE,
intercept = TRUE, family = "multinomial")
lambdaList <- c(lambdaList, res$lambda.min)
modelList[[i]] <- res
coefModel <- coef(res, s = "lambda.min")
for (n in names(coefModel)) {
coefMat[[n]][,i] <- coefModel[[n]][-1]
}
}
list(modelList = modelList, lambdaList = lambdaList, coefMat = coefMat)
}
Sum coefficients: to drop all features from multinomial regression that don’t meet specified cut off criteria, and gather coefficients for all remaining other features, for all repeats of the regression. Accepts a matrix coefMat, plus numeric values for coefCut, the minimum value of that the average coefficient should be, and freqCut, the minimum proportion of repeats that a coefficient should be significant.
sumCoef <- function(coefMat, coefCut = 0, freqCut =1) {
meanCoef <- rowMeans(abs(coefMat))
freqCoef <- rowMeans(coefMat != 0)
subMat <- coefMat[meanCoef > coefCut & freqCoef >= freqCut,,drop=FALSE]
eachTab <- data.frame(subMat) %>%
rownames_to_column("feature") %>% gather(key = "rep",value = "coef",-feature) %>%
mutate(rep = gsub("X","",rep))
return(eachTab)
}
Heatmap of all scaled log(viability) values, for all patient samples and stimuli.
In the code below, the viability data is normalised to DMSO controls and each row is scaled according to MAD. Limits are applied to scaling factor for optimal visualisation. The heatmap is plotted using pheatmap: the ordering of the columns (patient samples) is obtained from the dendrogram that results from running ConsensusClusterPlus using hierarchical clustering with the Euclidean metric. The rows are globally ordered using the dendrogram order produced by hclust with default branch arrangement.
########### Viability matrix ################
#make viability matrix for cytokine treatments for patients
viab.mat <- dplyr::select(df,
PatientID,
Log,
Cytokine) %>%
tidyr::spread(Cytokine, Log) %>%
as.data.frame()
#make PatientID the row names
rownames(viab.mat) <- unlist(viab.mat[,1]) # the first row will be the header
viab.mat <- dplyr::select(viab.mat,-PatientID)
#Transform matrix
viab.mat <- t(viab.mat)
#keep unscaled matrix
viab.mat.unscaled <- viab.mat
#run scaleCytResp on viability matrix
viab.mat <- scaleCytResp(viab.mat)
#apply deckel function to matrix
#Calculate dendrogram
breaks <- c(seq(-3, 3, length.out = 101))
viab.mat.lims <- deckel(viab.mat,
lower = dplyr::first(breaks),
upper = dplyr::last(breaks))
Run consensus clustering
results = ConsensusClusterPlus(d = viab.mat.lims,
maxK = 7,
reps = 10000,
pItem = 0.8,
pFeature = 1,
clusterAlg = "myclusterfunction",
distance = "myDistFunc",
plot = NULL,
seed = 1386,
finalLinkage = "complete",
innerLinkage = "complete")
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
Get cluster annotations
#Extract consensus clusters for k = 4
clusters <-
results[[4]][["consensusClass"]] %>%
tibble::enframe() %>%
dplyr::rename(PatientID="name", Cluster="value")
#get table of clusters
clusters <-
clusters %>%
dplyr::select(PatientID, Cluster)
#get matrix
cluster_matrix <- as.dist( 1 - results[[4]]$ml )
#add clusters to patMeta
patMeta_cl <- left_join(clusters, patMeta, by = "PatientID")
Arrange annotations for heatmap
#Sort heatmap annotations
#Select annotations from patMeta_cl
Heatmap_Annotation <- dplyr::select(patMeta_cl,
PatientID,
IGHV.status,
Methylation_Cluster,
trisomy12,
TP53,
ATM,
treatment,
gender,
Cluster) %>%
#Adjust names/levels for legend
mutate(treatment=case_when(treatment==0~"No",
treatment==1~"Yes")) %>%
mutate(gender=case_when(gender=="f"~"Female",
gender=="m"~"Male")) %>%
mutate(IGHV.status=case_when(IGHV.status=="U"~"Unmutated",
IGHV.status=="M"~"Mutated"))%>%
mutate(Methylation_Cluster=case_when(Methylation_Cluster=="LP"~"Low",
Methylation_Cluster=="IP"~"Intermediate",
Methylation_Cluster=="HP"~"High"))
#Rename columns for legend
colnames(Heatmap_Annotation) <- c("PatientID",
"IGHV status",
"Methylation Cluster",
"trisomy 12",
"TP53",
"ATM",
"Pretreated",
"Sex",
"Cluster")
#make Pat IDs the rownames
Heatmap_Annotation <- as.data.frame(Heatmap_Annotation)
rownames(Heatmap_Annotation) <- unlist(Heatmap_Annotation$PatientID)
#Tidy Up Heatmap Annotation table
Heatmap_Annotation$Cluster <- as.factor(Heatmap_Annotation$Cluster)
Heatmap_Annotation <- dplyr::select(Heatmap_Annotation,-"PatientID")
Define aesthetics for heatmap and annotations
########### Set Heatmap Aesthetics ###############
#Generate red-blue divergent palette
breaks <- c(seq(-3, 3, length.out = 101)) %>% `names<-`(
colorRampPalette(c(palblues,
"white", "white", "white",
palreds))(101))
#Specify colors of annotations
ann_colors =
list(
"IGHV status" = c("Unmutated"=IGHV[1],
"Mutated"=IGHV[2]),
"Methylation Cluster"= c("Low"=Methylation_cluster[1],
"Intermediate"=Methylation_cluster[2],
"High"=Methylation_cluster[3]),
"Sex" = c("Female"=Sex[1],
"Male"=Sex[2]),
"Pretreated" = c("No"=Mutant[1],
"Yes"=Mutant[2]),
"trisomy 12" = c("1"=Mutant[2],
"0"=Mutant[1]),
"ATM" = c("1"=Mutant[2],
"0"=Mutant[1]),
"TP53" = c("1"=Mutant[2],
"0"=Mutant[1]),
"Cluster" = c("1"=colors[1],
"2"=colors[2],
"3"=colors[3],
"4"=colors[4]))
Plot Figure
rownames(viab.mat.lims)<-rownames(viab.mat.lims) %>%
gsub("IL-1b","IL1\u03B2",.)%>%
gsub("SDF-1a","SDF-1\u03B1",.)%>%
gsub("Interferon gamma","Interferon \u03B3",.)%>%
gsub("TGF-b1","TGF-\u03B21",.)
#Plot heatmap
Fig2A <-
pheatmap(viab.mat.lims,
scale = "none",
clustering_method = "complete",
cluster_cols = TRUE,
show_colnames = FALSE,
cutree_cols = 4,
clustering_distance_cols = cluster_matrix,
annotation_col = Heatmap_Annotation,
annotation_colors = ann_colors,
cellheight = 22,
cellwidth = 4,
breaks = breaks,
color= names(breaks),
border_color=NA,
fontsize=10,
fontsize_row=16,
legend_breaks = c(-3,0,3) )
Fig2A
Lymphoctye doubling times (LDT) stratified by patient clusters
#plot LDT stratified by Patient Clusters
Fig2B <-
patMeta_cl %>%
#join patient meta data with clusters, to LDT dataframe
left_join( dplyr::select(LDT, PatientID, doubling.time), by=c("PatientID"="PatientID")) %>%
dplyr::filter(!is.na(doubling.time)) %>%
dplyr::mutate(Cluster=as.factor(Cluster)) %>%
ggplot(aes(x=Cluster, y=doubling.time, group=Cluster, fill=Cluster)) +
geom_boxplot() +
geom_beeswarm() +
scale_fill_manual(values=colors[1:4]) +
scale_y_log10() +
guides(fill="none") +
ylab("LDT in years") +
t2 +
stat_compare_means(method="t.test", comparison=list(c(1,2),c(3,4)), size=6, label.x = 1.3, label.y=2.1)
Fig2B
Here we plot KM curves to show TTT stratified by cluster. Clinical follow-up information to calculate OS was available for all 192 patients. For 188 patients treatment information after sample collection was available.
######Arrange data######
clusters.surv <- left_join(clusters, survT, by = "PatientID")
#Add meta data to heatmap clusters
clusters.surv <- left_join(clusters.surv, patMeta, by="PatientID")
Univariate Cox Proportional Hazards
Cluster 1 as reference
coxph_summ <- clusters.surv%>%
mutate(Cluster=factor(Cluster))%>%
coxph(Surv( TTT, treatedAfter) ~ factor2ind(Cluster, 1), data=.) %>%
summary()
coxph_summ$coefficients %>% round(5)
## coef exp(coef) se(coef) z Pr(>|z|)
## factor2ind(Cluster, 1)Cluster:2 0.47981 1.61577 0.28715 1.67093 0.09474
## factor2ind(Cluster, 1)Cluster:3 -0.24052 0.78622 0.27089 -0.88786 0.37461
## factor2ind(Cluster, 1)Cluster:4 -1.29254 0.27457 0.33893 -3.81361 0.00014
C1vsC2_p <- coxph_summ$coefficients[1,5] %>% round(3)
C1vsC2_p
## [1] 0.095
Cluster 3 as reference
coxph_summ <- clusters.surv%>%
mutate(Cluster=factor(Cluster))%>%
coxph(Surv( TTT, treatedAfter) ~ factor2ind(Cluster, 3), data=.) %>%
summary()
coxph_summ$coefficients %>% round(5)
## coef exp(coef) se(coef) z Pr(>|z|)
## factor2ind(Cluster, 3)Cluster:1 0.24052 1.27191 0.27089 0.88786 0.37461
## factor2ind(Cluster, 3)Cluster:2 0.72033 2.05510 0.33097 2.17639 0.02953
## factor2ind(Cluster, 3)Cluster:4 -1.05202 0.34923 0.37578 -2.79957 0.00512
Cluster 3 vs C1+2
coxph_summ <- clusters.surv%>%
filter(Cluster%in%c(1,2,3)) %>%
dplyr::mutate(Cluster=factor(Cluster), Cluster_12vs3=ifelse(Cluster%in%c(1,2),1,0))%>%
coxph(Surv( TTT, treatedAfter) ~ factor2ind(Cluster_12vs3, 1), data=.) %>%
summary()
coxph_summ$coefficients %>% round(5)
## coef exp(coef) se(coef) z Pr(>|z|)
## factor2ind(Cluster_12vs3, 1) 0.35273 1.42295 0.25861 1.36394 0.17259
Cluster 4 as reference
coxph_summ <- clusters.surv%>%
mutate(Cluster=factor(Cluster))%>%
coxph(Surv( TTT, treatedAfter) ~ factor2ind(Cluster, 4), data=.) %>%
summary()
coxph_summ$coefficients %>% round(5)
## coef exp(coef) se(coef) z Pr(>|z|)
## factor2ind(Cluster, 4)Cluster:1 1.29254 3.64203 0.33893 3.81361 0.00014
## factor2ind(Cluster, 4)Cluster:2 1.77235 5.88467 0.38930 4.55263 0.00001
## factor2ind(Cluster, 4)Cluster:3 1.05202 2.86344 0.37578 2.79957 0.00512
C3vsC4_p<-coxph_summ$coefficients[3,5] %>% round(3)
C3vsC4_p
## [1] 0.005
Multivariate Cox Proportional Hazards
Cluster 1 as reference
coxph_summ <- clusters.surv%>%
mutate(Cluster=factor(Cluster))%>%
coxph(Surv( TTT, treatedAfter) ~ factor2ind(Cluster, 1)+IGHV.status+trisomy12+TP53, data=.) %>%
summary()
coxph_summ$coefficients
## coef exp(coef) se(coef) z
## factor2ind(Cluster, 1)Cluster:2 0.55573674 1.7432248 0.3164764 1.7560134
## factor2ind(Cluster, 1)Cluster:3 0.03979159 1.0405939 0.2981304 0.1334704
## factor2ind(Cluster, 1)Cluster:4 -0.78031774 0.4582604 0.3983280 -1.9589831
## IGHV.statusU 0.55191581 1.7365768 0.2725341 2.0251255
## trisomy121 -0.13357407 0.8749627 0.3561725 -0.3750263
## TP531 1.38977496 4.0139467 0.2607176 5.3305760
## Pr(>|z|)
## factor2ind(Cluster, 1)Cluster:2 7.908611e-02
## factor2ind(Cluster, 1)Cluster:3 8.938214e-01
## factor2ind(Cluster, 1)Cluster:4 5.011477e-02
## IGHV.statusU 4.285447e-02
## trisomy121 7.076409e-01
## TP531 9.790173e-08
C1vsC2_p_mv<-coxph_summ$coefficients[1,5] %>% round(3)
C1vsC2_p_mv
## [1] 0.079
Cluster 4 as reference
coxph_summ <- clusters.surv%>%
mutate(Cluster=factor(Cluster))%>%
coxph(Surv( TTT, treatedAfter) ~ factor2ind(Cluster, 4)+IGHV.status+trisomy12+TP53, data=.) %>%
summary()
coxph_summ$coefficients
## coef exp(coef) se(coef) z
## factor2ind(Cluster, 4)Cluster:1 0.7803177 2.1821655 0.3983280 1.9589831
## factor2ind(Cluster, 4)Cluster:2 1.3360545 3.8040051 0.4618269 2.8929770
## factor2ind(Cluster, 4)Cluster:3 0.8201093 2.2707481 0.3975967 2.0626662
## IGHV.statusU 0.5519158 1.7365768 0.2725341 2.0251255
## trisomy121 -0.1335741 0.8749627 0.3561725 -0.3750263
## TP531 1.3897750 4.0139467 0.2607176 5.3305760
## Pr(>|z|)
## factor2ind(Cluster, 4)Cluster:1 5.011477e-02
## factor2ind(Cluster, 4)Cluster:2 3.816093e-03
## factor2ind(Cluster, 4)Cluster:3 3.914435e-02
## IGHV.statusU 4.285447e-02
## trisomy121 7.076409e-01
## TP531 9.790173e-08
C3vsC4_p_mv<-coxph_summ$coefficients[3,5] %>% round(3)
C3vsC4_p_mv
## [1] 0.039
Plot figure
##Run survfit on TTT
fit <- survfit(Surv(TTT, treatedAfter)~Cluster, clusters.surv)
t_plot<- t2+theme(plot.title=element_blank(), axis.title.x=element_blank(), legend.key = element_blank())
t_table<- t2+theme(plot.title=element_blank(), panel.grid.major.x = element_blank())
Fig2_KM_surv <-
ggsurvplot(fit,
surv.median.line = "hv", # Add medians survival
legend.title = "Cluster",# Change legends: title & labels
legend.labs = c("1", "2","3","4"),
pval = FALSE,
risk.table = TRUE,
palette = c(colors[1], colors[2], colors[3], colors[4]),
ggtheme = t_plot,
tables.theme = t_table,
legend = "bottom",
xlab="Time in years",
ylab="Time to next treatment (probability)",
break.time.by=1,
xlim=c(0,6.8))
Fig2_KM_surv_plot <-
Fig2_KM_surv$plot +
annotate(geom="text", x=6.2, y=0.5, label=C3vsC4_p, color="black", size=6) +
geom_segment(x = 5.7, xend = 5.7, y = 0.72, yend = 0.28, color="black") +
geom_segment(x = 5.6, xend = 5.7, y = 0.72, yend = 0.72, color="black") +
geom_segment(x = 5.6, xend = 5.7, y = 0.281, yend = 0.281, color="black") +
annotate(geom="text", x=6.8, y=0.23, label=C1vsC2_p, color="black", size=6) +
geom_segment(x = 6.3, xend = 6.3, y = 0.33, yend = 0.13, color="black") +
geom_segment(x = 6.2, xend = 6.3, y = 0.33, yend = 0.33, color="black") +
geom_segment(x = 6.2, xend = 6.3, y = 0.131, yend = 0.131, color="black")
Fig2C <- wrap_elements(Fig2_KM_surv_plot + Fig2_KM_surv$table + plot_layout(ncol=1, heights = c(0.8,0.2)))
Fig2C
Genetic predictors of cluster assignment.
Here we use a multinomial linear model with L1-penalty, implemented in the cvglmnet package. As the dependent variable, the cluster assignment for each patient was used. As input to the model, genetic features with more than 20% missing values were excluded, and only patients with complete annotation are included in the model (n=137). As predictors, the genetic mutations and CNVs (p=39) and IGHV status (coded as 0-1) are used, using 3-fold cross-validation. Misclassification error is used as loss for cross- validation.
Prepare feature matrix
#Generate Matrix
#select features from patient meta file
geneMatrix <-
dplyr::select(patMeta,
-c(gender:treatment)) %>%
#adjust IGHV.status levels U and M to numeric 1 and 0
mutate(IGHV = ifelse(is.na(IGHV.status), NA,
ifelse(IGHV.status == "M", 1, 0)),
#remove old columns and remove Methylation Cluster
IGHV.status=NULL, Methylation_Cluster=NULL ) %>%
#convert factors to numeric
mutate_if(is.factor, as.character) %>%
mutate_at(vars(-PatientID), as.numeric) %>%
#convert to matrix format, with patient IDs as rownames
data.frame() %>%
column_to_rownames("PatientID") %>%
as.matrix()
#Tidy matrix for use in glmnet function
#Remove genes with higher than 20% missing values
geneMatrix <- geneMatrix[,colSums(is.na(geneMatrix))/nrow(geneMatrix) <= 0.2]
#Filter for patients with complete data
geneMatrix.complete <- geneMatrix[complete.cases(geneMatrix),]
#Combine KRAS, NRAS and BRAF mutations into a single column
#set up empty matrix
Ras_Raf <- matrix(NA,
nrow = nrow(geneMatrix.complete),
ncol = 1)
colnames(Ras_Raf) <- "RAS/RAF"
#add RAS/RAF column to matrix
geneMatrix.complete <- cbind(geneMatrix.complete, Ras_Raf)
#Annotate RAS_RAF where where any of KRAS, NRAS or BRAF are mutated
geneMatrix.complete[,"RAS/RAF"] <- ifelse(geneMatrix.complete[,"KRAS"]==1,1,
ifelse(geneMatrix.complete[,"BRAF"]==1,1,
ifelse(geneMatrix.complete[,"NRAS"]==1, 1, 0)))
#remove KRAS, NRAS and BRAF columns
geneMatrix.complete <-
geneMatrix.complete[, colnames(geneMatrix.complete) != "KRAS"]
geneMatrix.complete <-
geneMatrix.complete[, colnames(geneMatrix.complete) != "BRAF"]
geneMatrix.complete <-
geneMatrix.complete[, colnames(geneMatrix.complete) != "NRAS"]
Prepare response matrix
#get Clusters and Patients
y <-
dplyr::select(patMeta_cl, PatientID, Cluster) %>%
column_to_rownames("PatientID")
#Cluster column as.numeric
y$Cluster <- as.numeric(as.character(y$Cluster))
#transform matrix
y <- t(y)
#Match response matrix and feature matrix
y <- y[,rownames(geneMatrix.complete)]
Run multinomial regression
X <- geneMatrix.complete
y <- y
res <- runGlm.multi(X, y, method = "lasso", repeats = 50, folds = 3)
Prepare table of coefficients for plot
coefList <- res$coefMat
#Filter for features whose coefficients meet the frequency and minimum value thresholds
coefTab <- lapply(names(coefList), function(d) {
coefMat <- coefList[[d]]
coefTab <- sumCoef(coefMat=coefMat, freqCut = 0.6, coefCut = 0.35) %>%
mutate(cluster = d)
}) %>% bind_rows()
coefTab %>%
group_by(feature) %>%
summarize() %>%
unlist()
## feature1 feature2 feature3 feature4 feature5 feature6
## "ATM" "gain8q" "IGHV" "POT1" "RAS/RAF" "SF3B1"
## feature7 feature8
## "TP53" "trisomy12"
#Annotate RAS_RAF where where any of KRAS, NRAS or BRAF are mutated
patMeta_cl[,"RAS_RAF"] <- ifelse(patMeta_cl[,"KRAS"]==1,1,
ifelse(patMeta_cl[,"BRAF"]==1,1,
ifelse(patMeta_cl[,"NRAS"]==1, 1, 0)))
patMeta_cl %<>% mutate(Cluster=as.factor(Cluster),
RAS_RAF=as.factor(RAS_RAF))
Coef_Perc_Table<- patMeta_cl %>%
filter(!is.na(IGHV.status)) %>%
group_by(Cluster, IGHV.status) %>%
count() %>%
group_by(Cluster) %>%
mutate(freq = n / sum(n)) %>%
mutate(Percentage=round(freq*100),0) %>%
filter(IGHV.status=="M") %>%
rename(IGHV.mutated=Percentage) %>%
select(Cluster, IGHV.mutated)
Coef_Perc_Table<- patMeta_cl %>%
filter(!is.na(trisomy12)) %>%
group_by(Cluster, trisomy12) %>%
count() %>%
group_by(Cluster) %>%
mutate(freq = n / sum(n)) %>%
mutate(Percentage=round(freq*100),0) %>%
filter(trisomy12=="1") %>%
rename(trisomy12_positive=Percentage) %>%
select(Cluster, trisomy12_positive) %>%
left_join(Coef_Perc_Table, ., by = "Cluster")
Coef_Perc_Table<- patMeta_cl %>%
filter(!is.na(SF3B1)) %>%
group_by(Cluster, SF3B1) %>%
count() %>%
group_by(Cluster) %>%
mutate(freq = n / sum(n)) %>%
mutate(Percentage=round(freq*100),0) %>%
filter(SF3B1=="1") %>%
rename(SF3B1_mut=Percentage) %>%
select(Cluster, SF3B1_mut)%>%
left_join(Coef_Perc_Table, ., by = "Cluster")
Coef_Perc_Table<- patMeta_cl %>%
filter(!is.na(POT1)) %>%
group_by(Cluster, POT1, .drop=FALSE) %>%
count(.drop=FALSE) %>%
group_by(Cluster) %>%
mutate(freq = n / sum(n)) %>%
mutate(Percentage=round(freq*100),0) %>%
filter(POT1=="1") %>%
rename(POT1_mut=Percentage) %>%
select(Cluster, POT1_mut)%>%
left_join(Coef_Perc_Table, ., by = "Cluster")
Coef_Perc_Table<- patMeta_cl %>%
filter(!is.na(ATM)) %>%
group_by(Cluster, ATM) %>%
count() %>%
group_by(Cluster) %>%
mutate(freq = n / sum(n)) %>%
mutate(Percentage=round(freq*100),0) %>%
filter(ATM=="1") %>%
rename(ATM_mut=Percentage) %>%
select(Cluster, ATM_mut)%>%
left_join(Coef_Perc_Table, ., by = "Cluster")
Coef_Perc_Table<- patMeta_cl %>%
filter(!is.na(TP53)) %>%
group_by(Cluster, TP53) %>%
count() %>%
group_by(Cluster) %>%
mutate(freq = n / sum(n)) %>%
mutate(Percentage=round(freq*100),0) %>%
filter(TP53=="1") %>%
rename(TP53_mut=Percentage) %>%
select(Cluster, TP53_mut)%>%
left_join(Coef_Perc_Table, ., by = "Cluster")
Coef_Perc_Table<- patMeta_cl %>%
filter(!is.na(RAS_RAF)) %>%
group_by(Cluster, RAS_RAF, .drop=FALSE) %>%
count(.drop=FALSE) %>%
group_by(Cluster) %>%
mutate(freq = n / sum(n)) %>%
mutate(Percentage=round(freq*100),0) %>%
filter(RAS_RAF=="1") %>%
rename(RAS_RAF_mut=Percentage) %>%
select(Cluster, RAS_RAF_mut)%>%
left_join(Coef_Perc_Table, ., by = "Cluster")
Coef_Perc_Table<- patMeta_cl %>%
filter(!is.na(gain8q)) %>%
group_by(Cluster, gain8q, .drop=FALSE) %>%
count(.drop = FALSE) %>%
group_by(Cluster) %>%
mutate(freq = n / sum(n)) %>%
mutate(Percentage=round(freq*100),0) %>%
filter(gain8q=="1") %>%
rename(gain8q_positive=Percentage) %>%
select(Cluster, gain8q_positive)%>%
left_join(Coef_Perc_Table, ., by = "Cluster")
# Coef_Perc_Table
plotTab <-
coefTab %>%
dplyr::group_by(feature, cluster) %>%
dplyr::summarise(meanCoef = mean(coef),
sdCoef = sd(coef)) %>%
dplyr::arrange(desc(meanCoef)) %>%
ungroup()
## `summarise()` has grouped output by 'feature'. You can override using the
## `.groups` argument.
#update labelling
plotTab <- plotTab %>% mutate(feature = ifelse(feature == "IGHV", "IGHV status",
ifelse(feature == "trisomy12", "trisomy 12",
ifelse(feature == "gain8q", "gain(8q)", feature))))
plotTab$feature <- factor(plotTab$feature,
levels=c("IGHV status",
"trisomy 12",
"SF3B1",
"POT1",
"ATM",
"TP53",
"RAS/RAF",
"gain(8q)"))
plotTab_for_Perc<- plotTab %>%
mutate(feature=case_when(feature=="IGHV status"~"IGHV.mutated",
feature=="trisomy 12"~"trisomy12_positive",
feature=="POT1"~"POT1_mut",
feature=="SF3B1"~"SF3B1_mut",
feature=="gain(8q)"~"gain8q_positive",
feature=="RAS/RAF"~"RAS_RAF_mut",
feature=="TP53"~"TP53_mut",
feature=="ATM"~"ATM_mut"))
Fig2D<-Coef_Perc_Table %>%
pivot_longer(IGHV.mutated:gain8q_positive, names_to = "feature", values_to = "Percentage" ) %>%
left_join(plotTab_for_Perc, by=c("Cluster"="cluster", "feature")) %>%
mutate(meanCoef=if_else(is.na(meanCoef), 0, meanCoef)) %>%
mutate(Cluster=factor(Cluster, levels = rev(c(1,2,3,4))),
feature= factor(feature,
levels=c("IGHV.mutated","trisomy12_positive","POT1_mut","SF3B1_mut", "gain8q_positive","RAS_RAF_mut","TP53_mut","ATM_mut"),
labels=c("IGHV mutated","trisomy 12","POT1","SF3B1","gain8q","KRAS, BRAF or NRAS","TP53","ATM" ))) %>%
ggplot(aes(y=Cluster, x=feature))+
geom_tile(aes(fill=meanCoef),color = "grey")+
scale_fill_gradient2(low ="#003DA5", mid ="white", high = "#A6093D", midpoint = 0)+
geom_text(aes( label=paste0(Percentage, "%")), size=7 )+
t1+
t.leg+
theme(legend.position = "bottom",
legend.key.width = unit(1.7, "cm"),
axis.text.x= element_text(size=fontsize+4, angle=45, vjust=1))+
labs(fill="Coefficient", x="Genetic alteration")
Fig2D
GSEA of differentially expressed genes between clusters.
Here, for the n=49 patient samples for which we have viability data and RNA–Seq data for matching samples, we search for associations of these two data types. Using DESeq2 we regress RNA-Seq count data on the patient clusters C3, C4 (design formula ~ IGHV.status + Cluster). Genes are ranked by their test statistics and Gene Set Enrichment Analysis (GSEA) implementing the fgsea algorithm with the clusterProfiler package is applied to the ranked lists with the Hallmark pathway gene sets from the MSigDB database.
Prepare RNA count data
cytSeq <- dds_smp
#Filter out IGHV genes
cytSeq <- cytSeq[!grepl("IG_", rowData(cytSeq)$biotype),]
#split into 3,4
cytSeq.34 <- cytSeq[,colData(cytSeq)[,"Cluster"] %in% c(3,4)]
#remove patients where IGHV is unknown
cytSeq.34 <- cytSeq.34[,colData(cytSeq.34)[,"IGHV.status"] %in% c("U","M")]
#set order of factors
cytSeq.34$Cluster <- factor(cytSeq.34$Cluster,
levels = c("3","4"))
cytSeq.34$IGHV.status <- factor(cytSeq.34$IGHV.status,
levels = c("U","M"))
design(cytSeq.34) <- ~ IGHV.status + Cluster
Run DESeq
cytSeq.34 <- DESeq(cytSeq.34)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 688 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
Extract results for 3 versus 4
res.ds <-
results(cytSeq.34, contrast = c("Cluster", 3, 4), tidy = TRUE) %>%
dplyr::rename(Symbol = "row") %>%
dplyr::arrange(pvalue)
Add Entrez IDs and readable gene names to results table
#get ensembl ids to Entrez dataframe
ens2entrez <-
rowData(cytSeq.34)[c("entrezgene", "symbol")] %>%
as.data.frame() %>%
rownames_to_column("ENSEMBL") %>%
tibble::as_tibble()
#Join
res.ds <- left_join(res.ds, ens2entrez, by=c("Symbol"="ENSEMBL"))
Get ranks dataframes to feed to fgsea
d <-
dplyr::select(res.ds, entrezgene, stat) %>%
na.omit() %>%
dplyr::group_by(entrezgene) %>%
dplyr::summarize(stat=mean(stat))
## feature 1: numeric vector
geneList <- d$stat
## feature 2: named vector
names(geneList) <- as.character(d$entrezgene)
## feature 3: decreasing order
geneList <- sort(geneList, decreasing = TRUE)
hm2gene <-
msigdbr(species = "Homo sapiens", category = "H") %>%
dplyr::select(gs_name, entrez_gene)
head(hm2gene)
## # A tibble: 6 x 2
## gs_name entrez_gene
## <chr> <int>
## 1 HALLMARK_ADIPOGENESIS 19
## 2 HALLMARK_ADIPOGENESIS 11194
## 3 HALLMARK_ADIPOGENESIS 10449
## 4 HALLMARK_ADIPOGENESIS 33
## 5 HALLMARK_ADIPOGENESIS 34
## 6 HALLMARK_ADIPOGENESIS 35
#tidy up terms
hm2gene$gs_name <- gsub("HALLMARK_", "",hm2gene$gs_name)
hm2gene$gs_name <- gsub("_", " ",hm2gene$gs_name)
hm2gene$gs_name <-gsub("MTORC1 SIGNALING" ,"MTORC1 Signaling",hm2gene$gs_name)
hm2gene$gs_name <-gsub("MYC TARGETS V1", "MYC Targets V1" ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("OXIDATIVE PHOSPHORYLATION", "Oxidative Phosphorylation",hm2gene$gs_name)
hm2gene$gs_name <-gsub("TNFA SIGNALING VIA NFKB", "TNFa Signaling via NFKB" ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("UNFOLDED PROTEIN RESPONSE", "Unfolded Protein Response" ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("UV RESPONSE UP", "UV Response Up",hm2gene$gs_name)
hm2gene$gs_name <-gsub("INTERFERON GAMMA RESPONSE", "Interferon Gamma Response" ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("G2M CHECKPOINT", "G2M Checkpoint",hm2gene$gs_name)
hm2gene$gs_name <-gsub("E2F TARGETS", "E2F Targets" ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("P53 PATHWAY", "P53 Pathway",hm2gene$gs_name)
#run GSEA
set.seed(1996)
gsea.res <- GSEA(geneList, TERM2GENE = hm2gene, by = "fgsea", seed = TRUE)
#make readable with gene names
gsea.res <- setReadable(gsea.res, org.Hs.eg.db, keyType = "ENTREZID")
Show top 10 pathways
#get dataframe of results
gsea.df <- fortify(gsea.res,
showCategory = 10, #how many levels to show
split=NULL)
#order by NES
gsea.df <- dplyr::mutate(gsea.df, "NES" = eval(parse(text="NES")))
idx <- order(gsea.df[["NES"]], decreasing = TRUE)
gsea.df$Description <- factor(gsea.df$Description, levels=rev(unique(gsea.df$Description[idx])))
gsea.df$pos <- 0
Fig2E <-
ggplot(gsea.df,
aes_string(x="NES",
y="Description",
fill="p.adjust")) +
geom_bar(stat = "identity") +
scale_fill_continuous(low=palreds[7],
high=palreds[2],
name = "Adjusted p value",
guide=guide_colorbar(reverse=TRUE)) +
ylab("Hallmark Pathway\n\n") +
xlab("Normalised Enrichment Score") +
ggtitle("Upregulation of gene sets in C3 versus C4") +
t2 +
scale_size(range=c(3, 8)) +
geom_text(aes(label=Description, x = pos), nudge_x = 0.1, hjust = 0, size = 6, color = "white") +
theme(legend.position=c(0.9, 0.2),
legend.background = element_blank(),
legend.box.background = element_rect(size = 0.5),
legend.title = element_text(face='bold',
hjust = 1, size=11),
legend.key = element_blank(),
legend.text = element_text(size=12),
axis.text.y = element_blank(),
axis.ticks.y = element_blank())
Fig2E
design1<-"
11
23
45
"
tp<-ggplot2::theme(plot.tag=element_text(size = 30))
Figure2 <-
wrap_elements(arrangeGrob(Fig2A[[4]])) +tp+
wrap_elements(Fig2B) + tp+
Fig2C + tp+
wrap_elements(Fig2D) + tp+
wrap_elements(Fig2E) + tp+
plot_layout(design=design1,
heights = c(1.6, 1.2, 1.2) ,
widths = c(1,1)) +
plot_annotation(tag_levels = "A", title="Figure 2", theme = theme(title=element_text(size = 20)))
Figure2
patMeta_cl %>%
group_by(Cluster, Methylation_Cluster) %>%
count() %>%
group_by(Cluster) %>%
mutate(freq = n / sum(n)) %>%
plyr::ddply("Cluster", transform, label_ypos=cumsum(freq)) %>%
ggplot(aes(x=Cluster, y=freq, fill=Methylation_Cluster))+
geom_bar(stat="identity")+
ylab("Relative incidence of\n methylation profiles")+
t2
Plot Figure 2D:
Coefficients (feature importance) for each cluster
#average remaining coefficients in dataframe for plotting
#ie show all sig features, the cluster they predict, and the average coefficient
plotTab <-
coefTab %>%
dplyr::group_by(feature, cluster) %>%
dplyr::summarise(meanCoef = mean(coef),
sdCoef = sd(coef)) %>%
dplyr::arrange(desc(meanCoef)) %>%
ungroup()
## `summarise()` has grouped output by 'feature'. You can override using the
## `.groups` argument.
#update labelling
plotTab <- plotTab %>% mutate(feature = ifelse(feature == "IGHV", "IGHV status",
ifelse(feature == "trisomy12", "trisomy 12",
ifelse(feature == "gain8q", "gain(8q)", feature))))
plotTab$feature <- factor(plotTab$feature,
levels=c("IGHV status",
"trisomy 12",
"SF3B1",
"POT1",
"ATM",
"TP53",
"RAS/RAF",
"gain(8q)"))
Cluster.labs <- c("Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4")
names(Cluster.labs) <- c(1, 2, 3, 4)
Fig2D <-
ggplot(plotTab, aes(x=feature, y=meanCoef))+
geom_bar(stat= "identity", aes(fill = cluster))+
geom_errorbar(aes(ymin = meanCoef - sdCoef,
ymax = meanCoef + sdCoef))+
ggtitle("") +
coord_cartesian(ylim=c(-1.5, 1.75))+
geom_hline(yintercept=0)+
ylab("Depleted / Enriched\n")+
xlab("")+
scale_fill_manual(values=colors[1:4])+
t1+
t.leg+
theme(strip.text = element_text(size = fontsize),
axis.text.x= element_text(size=fontsize+4, angle=45, vjust=1))+
facet_wrap(~cluster,
nrow=4,
strip.position = "right",
labeller = labeller(cluster = Cluster.labs))+
guides(fill = "none")
Fig2D
load( "../../data/CLL_PD.RData")
CLL_PD %>%
left_join( patMeta_cl, by="PatientID") %>%
ggplot(aes(x=Cluster, y=CLL_PD,group=Cluster, fill=Cluster ))+
geom_boxplot(outlier.shape = NA)+
geom_beeswarm(aes(shape=CLL_PD_group ))+
t2+
scale_fill_manual(values=colors[1:4]) +
guides(fill="none") +
ylab("CLL Proliferative Drive") +
stat_compare_means(method="t.test", comparison=list(c(1,2),c(3,4)), size=6, label.x = 1.3, label.y=3.1)+
coord_cartesian(clip="off")
x<-left_join(CLL_PD, df, by = "PatientID") %>%
group_by(PatientID,CLL_PD ) %>%
summarise(.groups="keep") %>%
ungroup() %>%
arrange(PatientID) %>%
select(CLL_PD) %>%
unlist()
# Stimuli<-"IL-4"
Stim_PD.corr <- vector("list", length = length(unique(df$Cytokine)))
names(Stim_PD.corr) <- unique(df$Cytokine)
for(Stimuli in unique(df$Cytokine)){
y<-left_join(CLL_PD, df, by = "PatientID") %>%
filter(Cytokine==Stimuli)%>%
group_by(PatientID,Log ) %>%
summarise(.groups="keep") %>%
ungroup() %>%
arrange(PatientID) %>%
select(Log) %>%
unlist()
cor <- cor.test(x, y, conf.level = 0.95, method = c("pearson"))
#convert to table
cor.df <- unlist(cor) %>% as.data.frame()
colnames(cor.df) <- Stimuli
#add corr.test output to drug.corr dataframe
Stim_PD.corr[[Stimuli]] <- cor.df
}
Stim_PD.corr_all <- do.call(cbind, Stim_PD.corr) %>%
rownames_to_column("feature")
Stim_PD.corr_all <- setNames(data.frame(t(Stim_PD.corr_all[,-1])), Stim_PD.corr_all[,1])
Stim_PD.corr_all <-rownames_to_column(Stim_PD.corr_all, var="Cytokine") %>%
mutate(p.value=as.numeric(p.value), estimate.cor=as.numeric(estimate.cor))
Stim_PD.corr_all %<>%
mutate(adj.p.value=p.adjust(p.value, method = "BH"))
ggplot( Stim_PD.corr_all, aes(x=estimate.cor, y=-log10(adj.p.value))) +
# annotate("rect", xmin = 0.0, xmax = Inf, ymin = -Inf, ymax = Inf, fill = palreds[1], alpha = 0.2)+
# annotate("rect", xmin = 0.0, xmax = -Inf, ymin = -Inf, ymax = Inf,alpha = 0.2, fill = palblues[1])+
geom_point(aes(alpha=0.4)) +
theme(legend.position = "none") +
geom_hline(yintercept = -log10(0.05))+
geom_label_repel(aes(label=ifelse(adj.p.value<0.05&abs(estimate.cor)>0.1,Cytokine,'')))+
t2 +
labs(title="Correlation between CLL-PD and response to Stimuli", x = "Pearson Correlation coefficient", y = "-log10(p-val)")+
guides(color="none", alpha="none")
# Stimuli<-"Resiquimod"
Stimuli_list<- arrange(Stim_PD.corr_all, estimate.cor) %>%
select(Cytokine) %>%
unlist() %>%
rev()
for( Stimuli in Stimuli_list){
Pearson_Correlation<-Stim_PD.corr_all %>%
filter(Cytokine==Stimuli) %>%
select(estimate.cor) %>%
unlist() %>%
round(2)
scatterplot<-left_join(CLL_PD, df, by = "PatientID") %>%
left_join(patMeta, by = "PatientID") %>%
filter(Cytokine==Stimuli,
!is.na(IGHV.status), !is.na(trisomy12)) %>%
ggplot(aes(x=CLL_PD, y=Log))+
geom_point(aes(color=IGHV.status, shape=trisomy12))+
geom_smooth(method = "lm", formula = 'y ~ x')+
stat_cor(method = "pearson")+
# geom_text(x=-1, y=2, label=paste0("Pearson Correlation Coefficient: ", Pearson_Correlation), size=4)+
t2+
ggtitle(paste0(Stimuli))
print(scatterplot)
}
# Stimuli<-"Resiquimod"
for( Stimuli in Stimuli_list){
Pearson_Correlation<-Stim_PD.corr_all %>%
filter(Cytokine==Stimuli) %>%
select(estimate.cor) %>%
unlist() %>%
round(2)
scatterplot<-left_join(CLL_PD, df, by = "PatientID") %>%
left_join(patMeta, by = "PatientID") %>%
filter(Cytokine==Stimuli,
!is.na(IGHV.status), !is.na(trisomy12)) %>%
ggplot(aes(x=CLL_PD, y=Log))+
geom_point(aes(color=IGHV.status, shape=trisomy12))+
geom_smooth(method = "lm", formula = 'y ~ x')+
stat_cor(method = "pearson")+
# geom_text(x=-1, y=2, label=paste0("Pearson Correlation Coefficient: ", Pearson_Correlation), size=4)+
t2+
ggtitle(paste0(Stimuli))+
facet_wrap(~IGHV.status)
print(scatterplot)
}
# Stimuli<-"Resiquimod"
for( Stimuli in Stimuli_list){
Pearson_Correlation<-Stim_PD.corr_all %>%
filter(Cytokine==Stimuli) %>%
select(estimate.cor) %>%
unlist() %>%
round(2)
scatterplot<-left_join(CLL_PD, df, by = "PatientID") %>%
left_join(patMeta_cl, by = "PatientID") %>%
filter(Cytokine==Stimuli,
!is.na(IGHV.status), !is.na(trisomy12)) %>%
ggplot(aes(x=CLL_PD, y=Log))+
geom_point(aes(color=IGHV.status, shape=trisomy12))+
geom_smooth(method = "lm", formula = 'y ~ x')+
stat_cor(method = "pearson")+
# geom_text(x=-1, y=2, label=paste0("Pearson Correlation Coefficient: ", Pearson_Correlation), size=4)+
t2+
ggtitle(paste0(Stimuli))+
facet_wrap(~Cluster)
print(scatterplot)
}
left_join(CLL_PD, df, by = "PatientID") %>%
left_join(patMeta_cl, by = "PatientID") %>%
group_by(PatientID, CLL_PD,IGHV.status) %>%
summarise(Cytokine_Overall_Response=mean((Log)), .groups="keep") %>%
ggplot(aes(x=CLL_PD, y= Cytokine_Overall_Response))+
geom_point(aes(color=IGHV.status))+
geom_smooth(method="lm", formula = 'y ~ x')+
stat_cor()+
t2
left_join(CLL_PD, df, by = "PatientID") %>%
left_join(patMeta_cl, by = "PatientID") %>%
filter(Cytokine%in%c("Resiquimod" ,"CpG ,ODN" ,"sCD40L" ,"IL-1b" ,"soluble ,anti-IgM" ,"BAFF" ,"TGF-b1")) %>%
group_by(PatientID, CLL_PD,IGHV.status) %>%
summarise(Cytokine_Overall_Response=mean((Log)), .groups="keep") %>%
ggplot(aes(x=CLL_PD, y= Cytokine_Overall_Response))+
geom_point(aes(color=IGHV.status))+
geom_smooth(method="lm", formula = 'y ~ x')+
stat_cor()+
t2+
ylab("Response to selected Cytokines")
df %>%
group_by(PatientID) %>%
summarise(Cytokine_Overall_Response=mean((Log)), .groups="keep") %>%
left_join(patMeta_cl, by = "PatientID") %>%
left_join(LDT, by = "PatientID") %>%
ggplot(aes(x=Cluster, y=Cytokine_Overall_Response))+
geom_boxplot(aes(fill=Cluster))+
geom_beeswarm()+
t2+
scale_fill_manual(values=colors[1:4]) +
guides(fill="none") +
ylab("Overall Stimuli response") +
stat_compare_means(method="t.test", comparison=list(c(1,2),c(3,4)), size=6, label.x = 1.3, label.y=0.45)+
coord_cartesian(clip="off")
df %>%
group_by(PatientID) %>%
summarise(Cytokine_Overall_Response=mean((Log)), .groups="keep") %>%
left_join(patMeta_cl, by = "PatientID") %>%
left_join(LDT, by = "PatientID") %>%
filter(!is.na(doubling.time)) %>%
ggplot(aes(x=Cytokine_Overall_Response, y= doubling.time))+
geom_point(aes())+
geom_smooth(method="lm", formula = 'y ~ x')+
stat_cor()+
scale_y_log10()+
t2+
xlab("Overall Stimuli response") +
ylab("LDT")
Heatmap of all scaled log(viability) values, for all patient samples and drugs.
########### Viability matrix ################
#make viability matrix for cytokine treatments for patients
viab.mat <- filter(df_full, Drug!="DMSO", Cytokine=="No Cytokine", Drug_Concentration=="High") %>%
dplyr::select(PatientID, Log, Drug) %>%
tidyr::spread(Drug, Log) %>%
as.data.frame()
#make PatientID the row names
rownames(viab.mat) <- unlist(viab.mat[,1]) # the first row will be the header
viab.mat <- viab.mat[,-1]
#Transform matrix
viab.mat <- t(viab.mat)
#keep unscaled matrix
viab.mat.unscaled <- viab.mat
#run scaleCytResp on viability matrix
viab.mat <- scaleCytResp(viab.mat)
#apply deckel function to matrix
#Calculate dendrogram
breaks <- c(seq(-2,2, length.out = 101))
viab.mat.lims <- deckel(viab.mat,
lower = dplyr::first(breaks),
upper = dplyr::last(breaks))
Arrange annotations for heatmap
#Sort heatmap annotations
#Select annotations from patMeta_cl
Heatmap_Annotation <- dplyr::select(patMeta_cl,
PatientID,
IGHV.status,
Methylation_Cluster,
trisomy12,
TP53,
ATM,
treatment,
gender,
Cluster) %>%
#Adjust names/levels for legend
mutate(treatment=case_when(treatment==0~"No",
treatment==1~"Yes")) %>%
mutate(gender=case_when(gender=="f"~"Female",
gender=="m"~"Male")) %>%
mutate(IGHV.status=case_when(IGHV.status=="U"~"Unmutated",
IGHV.status=="M"~"Mutated"))%>%
mutate(Methylation_Cluster=case_when(Methylation_Cluster=="LP"~"Low",
Methylation_Cluster=="IP"~"Intermediate",
Methylation_Cluster=="HP"~"High"))
#Rename columns for legend
colnames(Heatmap_Annotation) <- c("PatientID",
"IGHV status",
"Methylation Cluster",
"trisomy 12",
"TP53",
"ATM",
"Pretreated",
"Sex",
"Cluster")
#make Pat IDs the rownames
Heatmap_Annotation <- as.data.frame(Heatmap_Annotation)
rownames(Heatmap_Annotation) <- unlist(Heatmap_Annotation$PatientID)
#Tidy Up Heatmap Annotation table
Heatmap_Annotation$Cluster <- as.factor(Heatmap_Annotation$Cluster)
Heatmap_Annotation <- dplyr::select(Heatmap_Annotation,-"PatientID")
Define aesthetics for heatmap and annotations
########### Set Heatmap Aesthetics ###############
#Generate red-blue divergent palette
breaks <- breaks %>% `names<-`(
colorRampPalette(c(palblues, "white",palreds))(101))
#Specify colors of annotations
ann_colors =
list(
"IGHV status" = c("Unmutated"=IGHV[1],
"Mutated"=IGHV[2]),
"Methylation Cluster"= c("Low"=Methylation_cluster[1],
"Intermediate"=Methylation_cluster[2],
"High"=Methylation_cluster[3]),
"Sex" = c("Female"=Sex[1],
"Male"=Sex[2]),
"Pretreated" = c("No"=Mutant[1],
"Yes"=Mutant[2]),
"trisomy 12" = c("1"=Mutant[2],
"0"=Mutant[1]),
"ATM" = c("1"=Mutant[2],
"0"=Mutant[1]),
"TP53" = c("1"=Mutant[2],
"0"=Mutant[1]),
"Cluster" = c("1"=colors[1],
"2"=colors[2],
"3"=colors[3],
"4"=colors[4]))
Plot Figure
#Plot heatmap
Cluster_for_Heatmap<-patMeta_cl %>% select(PatientID, Cluster) %>%
column_to_rownames("PatientID") %>%
as.data.frame()
Pigengene::pheatmap.type(t(viab.mat.unscaled),
annRow = Cluster_for_Heatmap,
type="Cluster",
doTranspose=TRUE,
# scale = "row",
# clustering_method = "complete",
cluster_rows = TRUE,
show_colnames = FALSE,
# annotation_col = Heatmap_Annotation,
# annotation_colors = ann_colors,
cellheight = 22,
cellwidth = 4,
breaks = breaks,
color= names(breaks),
border_color=NA,
fontsize=10,
fontsize_row=16
)
Plot Figure
#Plot heatmap
viab.mat.tmp<-viab.mat.unscaled %>%
t() %>%
merge(Cluster_for_Heatmap,. , by=0)%>%
column_to_rownames("Row.names") %>%
filter(Cluster==1) %>%
select(-Cluster) %>%
t()
viab.mat.tmp.ordered<-viab.mat.tmp[ c( "Fludarabine" ,"Nutlin-3a" ,"Luminespib" ,"BAY-11-7085" ,"Pyridone-6" ,"Ralimetinib" ,"I-BET 762" ,"Everolimus" ,"Selumetinib" ,"Idelalisib" , "Ibrutinib" ,"PRT062607"), ]
# row.names(viab.mat.tmp)
C1_drug_heatmap<-pheatmap(viab.mat.tmp.ordered,
annRow = Cluster_for_Heatmap,
type="Cluster",
doTranspose=TRUE,
# scale = "row",
# clustering_method = "complete",
cluster_rows = FALSE,
show_colnames = FALSE,
annotation_col = Heatmap_Annotation,
annotation_colors = ann_colors,
cellheight = 22,
cellwidth = 4,
breaks = breaks,
color= names(breaks),
border_color=NA,
fontsize=10,
fontsize_row=16,
legend=FALSE,
annotation_legend = FALSE,
show_rownames=FALSE,
annotation_names_col=FALSE
)
#Plot heatmap
viab.mat.tmp<-viab.mat.unscaled %>%
t() %>%
merge(Cluster_for_Heatmap,. , by=0)%>%
column_to_rownames("Row.names") %>%
filter(Cluster==2) %>%
select(-Cluster) %>%
t()
viab.mat.tmp.ordered<-viab.mat.tmp[ c( "Fludarabine" ,"Nutlin-3a" ,"Luminespib" ,"BAY-11-7085" ,"Pyridone-6" ,"Ralimetinib" ,"I-BET 762" ,"Everolimus" ,"Selumetinib" ,"Idelalisib" , "Ibrutinib" ,"PRT062607"), ]
# row.names(viab.mat.tmp)
C2_drug_heatmap<-pheatmap(viab.mat.tmp.ordered,
annRow = Cluster_for_Heatmap,
type="Cluster",
doTranspose=TRUE,
# scale = "row",
# clustering_method = "complete",
cluster_rows = FALSE,
show_colnames = FALSE,
annotation_col = Heatmap_Annotation,
annotation_colors = ann_colors,
cellheight = 22,
cellwidth = 4,
breaks = breaks,
color= names(breaks),
border_color=NA,
fontsize=10,
fontsize_row=16,
legend=FALSE,
annotation_legend = FALSE,
show_rownames=FALSE,
annotation_names_col=FALSE
)
#Plot heatmap
viab.mat.tmp<-viab.mat.unscaled %>%
t() %>%
merge(Cluster_for_Heatmap,. , by=0)%>%
column_to_rownames("Row.names") %>%
filter(Cluster==3) %>%
select(-Cluster) %>%
t()
viab.mat.tmp.ordered<-viab.mat.tmp[ c( "Fludarabine" ,"Nutlin-3a" ,"Luminespib" ,"BAY-11-7085" ,"Pyridone-6" ,"Ralimetinib" ,"I-BET 762" ,"Everolimus" ,"Selumetinib" ,"Idelalisib" , "Ibrutinib" ,"PRT062607"), ]
# row.names(viab.mat.tmp)
C3_drug_heatmap<-pheatmap(viab.mat.tmp.ordered,
annRow = Cluster_for_Heatmap,
type="Cluster",
doTranspose=TRUE,
# scale = "row",
# clustering_method = "complete",
cluster_rows = FALSE,
show_colnames = FALSE,
annotation_col = Heatmap_Annotation,
annotation_colors = ann_colors,
cellheight = 22,
cellwidth = 4,
breaks = breaks,
color= names(breaks),
border_color=NA,
fontsize=10,
fontsize_row=16,
legend=FALSE,
annotation_legend = FALSE,
show_rownames=FALSE,
annotation_names_col=FALSE
)
#Plot heatmap
viab.mat.tmp<-viab.mat.unscaled %>%
t() %>%
merge(Cluster_for_Heatmap,. , by=0)%>%
column_to_rownames("Row.names") %>%
filter(Cluster==4) %>%
select(-Cluster) %>%
t()
viab.mat.tmp.ordered<-viab.mat.tmp[ c( "Fludarabine" ,"Nutlin-3a" ,"Luminespib" ,"BAY-11-7085" ,"Pyridone-6" ,"Ralimetinib" ,"I-BET 762" ,"Everolimus" ,"Selumetinib" ,"Idelalisib" , "Ibrutinib" ,"PRT062607"), ]
# row.names(viab.mat.tmp)
C4_drug_heatmap<-pheatmap(viab.mat.tmp.ordered,
annRow = Cluster_for_Heatmap,
type="Cluster",
doTranspose=TRUE,
# scale = "row",
# clustering_method = "complete",
cluster_rows = FALSE,
show_colnames = FALSE,
annotation_col = Heatmap_Annotation,
annotation_colors = ann_colors,
cellheight = 22,
cellwidth = 4,
breaks = breaks,
color= names(breaks),
border_color=NA,
fontsize=10,
fontsize_row=16
)
wrap_elements(arrangeGrob(C1_drug_heatmap[[4]]))+
wrap_elements(arrangeGrob(C2_drug_heatmap[[4]]))+
wrap_elements(arrangeGrob(C3_drug_heatmap[[4]]))+
wrap_elements(arrangeGrob(C4_drug_heatmap[[4]]))+
plot_layout(nrow=1, widths = c(0.93,0.13,0.52,1.03))
## Appendix
Sys.info()
## sysname release version nodename
## "Windows" "10 x64" "build 19042" "DESKTOP-BF7AU2H"
## machine login user effective_user
## "x86-64" "PeterBruch" "PeterBruch" "PeterBruch"
sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=German_Germany.1252 LC_CTYPE=German_Germany.1252
## [3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C
## [5] LC_TIME=German_Germany.1252
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0
## [3] purrr_0.3.4 readr_2.1.2
## [5] tibble_3.1.6 tidyverse_1.3.1
## [7] dplyr_1.0.7 msigdbr_7.4.1
## [9] org.Hs.eg.db_3.11.4 AnnotationDbi_1.50.3
## [11] clusterProfiler_3.16.1 ConsensusClusterPlus_1.52.0
## [13] glmnet_4.1-3 Matrix_1.3-2
## [15] survminer_0.4.9 ggpubr_0.4.0.999
## [17] survival_3.2-13 DESeq2_1.28.1
## [19] SummarizedExperiment_1.18.2 DelayedArray_0.14.1
## [21] matrixStats_0.61.0 Biobase_2.48.0
## [23] GenomicRanges_1.40.0 GenomeInfoDb_1.24.2
## [25] IRanges_2.22.2 S4Vectors_0.26.1
## [27] BiocGenerics_0.34.0 gridExtra_2.3
## [29] ggfortify_0.4.14 pheatmap_1.0.12
## [31] magrittr_2.0.2 ggbeeswarm_0.6.0
## [33] tidyr_1.2.0 patchwork_1.1.1
## [35] ggrepel_0.9.1 ggplot2_3.3.5
## [37] RColorBrewer_1.1-2 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] bit64_4.0.5 knitr_1.37 data.table_1.14.2
## [4] rpart_4.1-15 RCurl_1.98-1.5 doParallel_1.0.16
## [7] generics_0.1.2 snow_0.4-4 preprocessCore_1.50.0
## [10] cowplot_1.1.1 RSQLite_2.2.9 europepmc_0.4.1
## [13] bit_4.0.4 tzdb_0.2.0 enrichplot_1.8.1
## [16] xml2_1.3.3 lubridate_1.8.0 assertthat_0.2.1
## [19] viridis_0.6.2 xfun_0.29 hms_1.1.1
## [22] jquerylib_0.1.4 babelgene_21.4 evaluate_0.14
## [25] fansi_1.0.2 progress_1.2.2 dbplyr_2.1.1
## [28] readxl_1.3.1 htmlwidgets_1.5.4 Rgraphviz_2.32.0
## [31] km.ci_0.5-2 igraph_1.2.11 DBI_1.1.2
## [34] geneplotter_1.66.0 ellipsis_0.3.2 backports_1.4.1
## [37] bookdown_0.24 markdown_1.1 annotate_1.66.0
## [40] libcoin_1.0-9 vctrs_0.3.8 abind_1.4-5
## [43] cachem_1.0.6 withr_2.4.3 ggforce_0.3.3
## [46] triebeard_0.3.0 bnlearn_4.7 checkmate_2.0.0
## [49] prettyunits_1.1.1 cluster_2.1.1 DOSE_3.14.0
## [52] mltools_0.3.5 crayon_1.4.2 genefilter_1.70.0
## [55] pkgconfig_2.0.3 labeling_0.4.2 tweenr_1.0.2
## [58] nlme_3.1-155 vipor_0.4.5 nnet_7.3-15
## [61] rlang_1.0.1 lifecycle_1.0.1 downloader_0.4
## [64] modelr_0.1.8 cellranger_1.1.0 Cubist_0.4.0
## [67] polyclip_1.10-0 partykit_1.2-15 graph_1.66.0
## [70] urltools_1.7.3 KMsurv_0.1-5 carData_3.0-5
## [73] zoo_1.8-9 base64enc_0.1-3 reprex_2.0.1
## [76] beeswarm_0.4.0 ggridges_0.5.3 png_0.1-7
## [79] viridisLite_0.4.0 bitops_1.0-7 blob_1.2.2
## [82] shape_1.4.6 qvalue_2.20.0 jpeg_0.1-9
## [85] rstatix_0.7.0 gridGraphics_0.5-1 ggsignif_0.6.3
## [88] scales_1.1.1 memoise_2.0.1 plyr_1.8.6
## [91] gdata_2.18.0 zlibbioc_1.34.0 compiler_4.0.5
## [94] scatterpie_0.1.7 cli_3.1.1 XVector_0.28.0
## [97] htmlTable_2.4.0 Formula_1.2-4 MASS_7.3-55
## [100] mgcv_1.8-38 WGCNA_1.70-3 tidyselect_1.1.1
## [103] stringi_1.7.6 highr_0.9 yaml_2.2.2
## [106] GOSemSim_2.14.2 locfit_1.5-9.4 latticeExtra_0.6-29
## [109] survMisc_0.5.5 Pigengene_1.14.0 grid_4.0.5
## [112] sass_0.4.0 fastmatch_1.1-3 tools_4.0.5
## [115] rstudioapi_0.13 foreign_0.8-81 foreach_1.5.2
## [118] inum_1.0-4 C50_0.1.6 farver_2.1.0
## [121] ggraph_2.0.5 digest_0.6.29 rvcheck_0.2.1
## [124] BiocManager_1.30.16 ggtext_0.1.1 Rcpp_1.0.8
## [127] gridtext_0.1.4 car_3.0-12 broom_0.7.12
## [130] httr_1.4.2 colorspace_2.0-2 rvest_1.0.2
## [133] XML_3.99-0.8 fs_1.5.2 splines_4.0.5
## [136] yulab.utils_0.0.4 graphlayouts_0.8.0 ggplotify_0.1.0
## [139] xtable_1.8-4 jsonlite_1.7.3 dynamicTreeCut_1.63-1
## [142] tidygraph_1.2.0 ggfun_0.0.5 R6_2.5.1
## [145] Hmisc_4.6-0 pillar_1.7.0 htmltools_0.5.2
## [148] glue_1.6.1 fastmap_1.1.0 BiocParallel_1.22.0
## [151] codetools_0.2-18 fgsea_1.14.0 mvtnorm_1.1-3
## [154] utf8_1.2.2 lattice_0.20-41 bslib_0.3.1
## [157] gtools_3.9.2 magick_2.7.3 GO.db_3.11.4
## [160] rmarkdown_2.11 munsell_0.5.0 fastcluster_1.2.3
## [163] DO.db_2.9 GenomeInfoDbData_1.2.3 iterators_1.0.14
## [166] impute_1.62.0 haven_2.4.3 reshape2_1.4.4
## [169] gtable_0.3.0